"""
Interface to hold contents of api responses when they do not confirm
to the OpenAI style response.
"""

from typing import Literal, Optional, List, AsyncGenerator, Union, Dict, Any
from pydantic import BaseModel
from dataclasses import dataclass, field


class Function(BaseModel):
    """Represents a function call."""

    arguments: str
    name: str


class ChatCompletionMessageToolCall(BaseModel):
    """Represents a tool call in a chat completion message."""

    id: str
    function: Function
    type: Literal["function"]


class Message(BaseModel):
    """Represents a message in a chat completion."""

    content: Optional[str] = None
    reasoning_content: Optional[str] = None
    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
    role: Optional[Literal["user", "assistant", "system", "tool"]] = None
    refusal: Optional[str] = None


class CompletionTokensDetails(BaseModel):
    """Details about the tokens used in a completion."""

    accepted_prediction_tokens: Optional[int] = None
    """
    When using Predicted Outputs, the number of tokens in the prediction that
    appeared in the completion.
    """

    audio_tokens: Optional[int] = None
    """Audio input tokens generated by the model."""

    reasoning_tokens: Optional[int] = None
    """Tokens generated by the model for reasoning."""

    rejected_prediction_tokens: Optional[int] = None
    """
    When using Predicted Outputs, the number of tokens in the prediction that did
    not appear in the completion. However, like reasoning tokens, these tokens are
    still counted in the total completion tokens for purposes of billing, output,
    and context window limits.
    """


class PromptTokensDetails(BaseModel):
    """Details about the tokens used in a prompt."""

    text_tokens: Optional[int] = None
    """Tokens generated by the model for text."""

    audio_tokens: Optional[int] = None
    """Audio input tokens present in the prompt."""

    cached_tokens: Optional[int] = None
    """Cached tokens present in the prompt."""


class CompletionUsage(BaseModel):
    """Represents the token usage for a completion."""

    completion_tokens: Optional[int] = None
    """Number of tokens in the generated completion."""

    prompt_tokens: Optional[int] = None
    """Number of tokens in the prompt."""

    total_tokens: Optional[int] = None
    """Total number of tokens used in the request (prompt + completion)."""

    completion_tokens_details: Optional[CompletionTokensDetails] = None
    """Breakdown of tokens used in a completion."""

    prompt_tokens_details: Optional[PromptTokensDetails] = None
    """Breakdown of tokens used in the prompt."""


class Word(BaseModel):
    """Represents a single word with timing information."""

    word: str
    start: float
    end: float
    confidence: Optional[float] = None  # Common across Deepgram, Azure, AWS
    speaker: Optional[int] = None  # Speaker diarization (Deepgram, Azure, AWS)
    speaker_confidence: Optional[float] = None  # Speaker identification confidence
    punctuated_word: Optional[str] = None  # Word with punctuation (some providers)


class Segment(BaseModel):
    """Represents a segment of transcribed text with detailed information."""

    id: int
    seek: int
    start: float
    end: float
    text: str
    # OpenAI Whisper specific fields
    tokens: Optional[List[int]] = None
    temperature: Optional[float] = None
    avg_logprob: Optional[float] = None
    compression_ratio: Optional[float] = None
    no_speech_prob: Optional[float] = None
    # Common ASR provider fields
    confidence: Optional[float] = None  # Segment-level confidence
    speaker: Optional[int] = None  # Primary speaker for this segment
    speaker_confidence: Optional[float] = None  # Speaker identification confidence
    words: Optional[List[Word]] = None  # Words within this segment


class Alternative(BaseModel):
    """Represents an alternative transcription hypothesis (common in many ASR APIs)."""

    transcript: str
    confidence: Optional[float] = None
    words: Optional[List[Word]] = None


class Channel(BaseModel):
    """Represents a single audio channel (for multi-channel audio)."""

    alternatives: List[Alternative]
    search: Optional[List[dict]] = None  # Search results if keyword search enabled


class TranscriptionResult(BaseModel):
    """
    Unified transcription result format supporting multiple ASR providers.
    Based on OpenAI Whisper API but extended for common ASR features.
    """

    # Core fields (supported by most providers)
    text: str
    language: Optional[str] = None
    confidence: Optional[float] = None  # Overall transcription confidence

    # OpenAI Whisper specific fields
    task: Optional[str] = None  # "transcribe" or "translate"
    duration: Optional[float] = None
    segments: Optional[List[Segment]] = None
    words: Optional[List[Word]] = None

    # Multi-channel and alternatives support (Deepgram, Azure, etc.)
    channels: Optional[List[Channel]] = None
    alternatives: Optional[List[Alternative]] = None

    # Advanced features (various providers)
    utterances: Optional[List[dict]] = None  # Speaker utterances
    paragraphs: Optional[List[dict]] = None  # Paragraph detection
    topics: Optional[List[dict]] = None  # Topic detection
    intents: Optional[List[dict]] = None  # Intent recognition
    sentiment: Optional[dict] = None  # Sentiment analysis
    summary: Optional[dict] = None  # Auto-summarization

    # Metadata
    metadata: Optional[dict] = None  # Provider-specific metadata
    model_info: Optional[dict] = None  # Model information


class StreamingTranscriptionChunk(BaseModel):
    """Represents a single chunk of streaming transcription data."""

    text: str
    is_final: bool
    confidence: Optional[float] = None
    start_time: Optional[float] = None
    end_time: Optional[float] = None
    speaker_id: Optional[int] = None
    speaker_confidence: Optional[float] = None
    words: Optional[List[Word]] = None
    sequence_number: Optional[int] = None
    channel: Optional[int] = None
    provider_data: Optional[dict] = None


# Type alias for streaming transcription responses
StreamingTranscriptionResponse = AsyncGenerator[StreamingTranscriptionChunk, None]

# Union type for both batch and streaming responses
TranscriptionResponse = Union[TranscriptionResult, StreamingTranscriptionResponse]


@dataclass
class TranscriptionOptions:
    """Unified transcription options for ASR providers."""

    # Core parameters
    language: Optional[str] = None

    # Audio format parameters
    audio_format: Optional[str] = None
    sample_rate: Optional[int] = None
    channels: Optional[int] = None
    encoding: Optional[str] = None  # Audio encoding type

    # Output format
    response_format: Optional[str] = None
    include_word_timestamps: Optional[bool] = None
    include_segment_timestamps: Optional[bool] = None
    timestamp_granularities: Optional[List[str]] = None  # OpenAI: ["word", "segment"]

    # Context and guidance
    prompt: Optional[str] = None
    context_phrases: Optional[List[str]] = None
    boost_phrases: Optional[List[str]] = None

    # Speaker features
    enable_speaker_diarization: Optional[bool] = None
    max_speakers: Optional[int] = None
    min_speakers: Optional[int] = None

    # Text processing
    enable_automatic_punctuation: Optional[bool] = None
    enable_profanity_filter: Optional[bool] = None
    enable_smart_formatting: Optional[bool] = None
    enable_word_confidence: Optional[bool] = None
    enable_spoken_punctuation: Optional[bool] = None
    enable_spoken_emojis: Optional[bool] = None

    # Advanced features
    enable_sentiment_analysis: Optional[bool] = None
    enable_topic_detection: Optional[bool] = None
    enable_intent_recognition: Optional[bool] = None
    enable_summarization: Optional[bool] = None
    enable_translation: Optional[bool] = None
    translation_target_language: Optional[str] = None

    # Confidence and alternatives
    include_confidence_scores: Optional[bool] = None
    max_alternatives: Optional[int] = None

    # Processing options
    temperature: Optional[float] = None
    interim_results: Optional[bool] = None
    vad_sensitivity: Optional[float] = None
    stream: Optional[bool] = None  # Enable streaming output

    # Custom parameters
    custom_parameters: Dict[str, Any] = field(default_factory=dict)

    def __post_init__(self):
        """Validate parameters and constraints."""
        # Validate constraints
        if self.temperature is not None and not (0.0 <= self.temperature <= 1.0):
            raise ValueError("temperature must be between 0.0 and 1.0")

        if self.max_speakers is not None and self.max_speakers < 1:
            raise ValueError("max_speakers must be at least 1")

        if self.min_speakers is not None and self.min_speakers < 1:
            raise ValueError("min_speakers must be at least 1")

        if (
            self.max_speakers is not None
            and self.min_speakers is not None
            and self.min_speakers > self.max_speakers
        ):
            raise ValueError("min_speakers cannot be greater than max_speakers")

        if self.vad_sensitivity is not None and not (
            0.0 <= self.vad_sensitivity <= 1.0
        ):
            raise ValueError("vad_sensitivity must be between 0.0 and 1.0")

    def has_any_parameters(self) -> bool:
        """Check if any parameters are set."""
        for field_name, field_value in self.__dict__.items():
            if field_name == "custom_parameters":
                if field_value:
                    return True
            elif field_value is not None:
                return True
        return False

    def get_set_parameters(self) -> Dict[str, Any]:
        """Get only the parameters that are set."""
        set_params = {}
        for field_name, field_value in self.__dict__.items():
            if field_name == "custom_parameters":
                if field_value:
                    set_params[field_name] = field_value
            elif field_value is not None:
                set_params[field_name] = field_value
        return set_params
